Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """Restriction Enzyme classes. 
  12   
  13  Notes about the diverses class of the restriction enzyme implementation:: 
  14   
  15              RestrictionType is the type of all restriction enzymes. 
  16          ---------------------------------------------------------------------------- 
  17              AbstractCut implements some methods that are common to all enzymes. 
  18          ---------------------------------------------------------------------------- 
  19              NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  20                                      produced by the enzyme. 
  21                                      they correspond to the 4th field of the 
  22                                      rebase record emboss_e.NNN. 
  23                      0->NoCut    : the enzyme is not characterised. 
  24                      2->OneCut   : the enzyme produce one double strand cut. 
  25                      4->TwoCuts  : two double strand cuts. 
  26          ---------------------------------------------------------------------------- 
  27              Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  28                                      the enzyme. 
  29                                      Not implemented yet. 
  30          ---------------------------------------------------------------------------- 
  31              Palindromic,            if the site is palindromic or not. 
  32              NotPalindromic          allow some optimisations of the code. 
  33                                      No need to check the reverse strand 
  34                                      with palindromic sites. 
  35          ---------------------------------------------------------------------------- 
  36              Unknown, Blunt,         represent the overhang. 
  37              Ov5, Ov3                Unknown is here for symmetry reasons and 
  38                                      correspond to enzymes that are not 
  39                                      characterised in rebase. 
  40          ---------------------------------------------------------------------------- 
  41              Defined, Ambiguous,     represent the sequence of the overhang. 
  42              NotDefined 
  43                                      NotDefined is for enzymes not characterised 
  44                                      in rebase. 
  45   
  46                                      Defined correspond to enzymes that display 
  47                                      a constant overhang whatever the sequence. 
  48                                      ex : EcoRI. G^AATTC -> overhang :AATT 
  49                                                  CTTAA^G 
  50   
  51                                      Ambiguous : the overhang varies with the 
  52                                      sequence restricted. 
  53                                      Typically enzymes which cut outside their 
  54                                      restriction site or (but not always) 
  55                                      inside an ambiguous site. 
  56                                      ex: 
  57                                      AcuI CTGAAG(22/20)  -> overhang : NN 
  58                                      AasI GACNNN^NNNGTC  -> overhang : NN 
  59                                           CTGN^NNNNNCAG 
  60   
  61                  note : these 3 classes refers to the overhang not the site. 
  62                     So the enzyme ApoI (RAATTY) is defined even if its 
  63                     restriction site is ambiguous. 
  64   
  65                          ApoI R^AATTY -> overhang : AATT -> Defined 
  66                               YTTAA^R 
  67                     Accordingly, blunt enzymes are always Defined even 
  68                     when they cut outside their restriction site. 
  69          ---------------------------------------------------------------------------- 
  70              Not_available,          as found in rebase file emboss_r.NNN files. 
  71              Commercially_available 
  72                                      allow the selection of the enzymes 
  73                                      according to their suppliers to reduce the 
  74                                      quantity of results. 
  75                                      Also will allow the implementation of 
  76                                      buffer compatibility tables. Not 
  77                                      implemented yet. 
  78   
  79                                      the list of suppliers is extracted from 
  80                                      emboss_s.NNN 
  81          ---------------------------------------------------------------------------- 
  82   
  83  """ 
  84   
  85  from __future__ import print_function 
  86   
  87  import warnings 
  88   
  89  from Bio._py3k import zip 
  90  from Bio._py3k import filter 
  91  from Bio._py3k import range 
  92   
  93  import re 
  94  import itertools 
  95   
  96  from Bio.Seq import Seq, MutableSeq 
  97  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  98  from Bio.Restriction.Restriction_Dictionary import typedict 
  99  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
 100  from Bio.Restriction.PrintFormat import PrintFormat 
 101  from Bio import BiopythonWarning 
102 103 104 # Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 105 # namespace), but have deprecated that module. 106 107 108 -def _check_bases(seq_string):
109 """Check characters in a string (PRIVATE). 110 111 Remove digits and white space present in string. Allows any valid ambiguous 112 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 113 114 Other characters (e.g. symbols) trigger a TypeError. 115 116 Returns the string WITH A LEADING SPACE (!). This is for backwards 117 compatibility, and may in part be explained by the fact that 118 Bio.Restriction doesn't use zero based counting. 119 """ 120 # Remove white space and make upper case: 121 seq_string = "".join(seq_string.split()).upper() 122 # Remove digits 123 for c in "0123456789": 124 seq_string = seq_string.replace(c, "") 125 # Check only allowed IUPAC letters 126 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")): 127 raise TypeError("Invalid character found in %s" % repr(seq_string)) 128 return " " + seq_string
129 130 131 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN', 132 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY', 133 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY', 134 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY', 135 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY', 136 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'} 137 138 DNA = Seq
139 140 141 -class FormattedSeq(object):
142 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 143 144 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 145 146 Roughly: remove anything which is not IUPAC alphabet and then add a space 147 in front of the sequence to get a biological index instead of a 148 python index (i.e. index of the first base is 1 not 0). 149 150 Retains information about the shape of the molecule linear (default) or 151 circular. Restriction sites are search over the edges of circular sequence. 152 """ 153
154 - def __init__(self, seq, linear=True):
155 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 156 157 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 158 if seq is a FormattedSeq, linear will have no effect on the 159 shape of the sequence. 160 """ 161 if isinstance(seq, (Seq, MutableSeq)): 162 stringy = str(seq) 163 self.lower = stringy.islower() 164 # Note this adds a leading space to the sequence (!) 165 self.data = _check_bases(stringy) 166 self.linear = linear 167 self.klass = seq.__class__ 168 self.alphabet = seq.alphabet 169 elif isinstance(seq, FormattedSeq): 170 self.lower = seq.lower 171 self.data = seq.data 172 self.linear = seq.linear 173 self.alphabet = seq.alphabet 174 self.klass = seq.klass 175 else: 176 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
177
178 - def __len__(self):
179 return len(self.data) - 1
180
181 - def __repr__(self):
182 return 'FormattedSeq(%s, linear=%s)' % (repr(self[1:]), 183 repr(self.linear))
184
185 - def __eq__(self, other):
186 if isinstance(other, FormattedSeq): 187 if repr(self) == repr(other): 188 return True 189 else: 190 return False 191 return False
192
193 - def circularise(self):
194 """Circularise sequence in place.""" 195 self.linear = False 196 return
197
198 - def linearise(self):
199 """Linearise sequence in place.""" 200 self.linear = True 201 return
202
203 - def to_linear(self):
204 """Make a new instance of sequence as linear.""" 205 new = self.__class__(self) 206 new.linear = True 207 return new
208
209 - def to_circular(self):
210 """Make a new instance of sequence as circular.""" 211 new = self.__class__(self) 212 new.linear = False 213 return new
214
215 - def is_linear(self):
216 """Return if sequence is linear (True) or circular (False).""" 217 return self.linear
218
219 - def finditer(self, pattern, size):
220 """Return a list of a given pattern which occurs in the sequence. 221 222 The list is made of tuple (location, pattern.group). 223 The latter is used with non palindromic sites. 224 Pattern is the regular expression pattern corresponding to the 225 enzyme restriction site. 226 Size is the size of the restriction enzyme recognition-site size. 227 """ 228 if self.is_linear(): 229 data = self.data 230 else: 231 data = self.data + self.data[1:size] 232 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
233
234 - def __getitem__(self, i):
235 if self.lower: 236 return self.klass((self.data[i]).lower(), self.alphabet) 237 return self.klass(self.data[i], self.alphabet)
238
239 240 -class RestrictionType(type):
241 """RestrictionType. Type from which all enzyme classes are derived. 242 243 Implement the operator methods. 244 """ 245
246 - def __init__(cls, name='', bases=(), dct=None):
247 """Initialize RestrictionType instance. 248 249 Not intended to be used in normal operation. The enzymes are 250 instantiated when importing the module. 251 See below. 252 """ 253 if "-" in name: 254 raise ValueError("Problem with hyphen in %s as enzyme name" 255 % repr(name)) 256 # 2011/11/26 - Nobody knows what this call was supposed to accomplish, 257 # but all unit tests seem to pass without it. 258 # super(RestrictionType, cls).__init__(cls, name, bases, dct) 259 try: 260 cls.compsite = re.compile(cls.compsite) 261 except AttributeError: 262 # Can happen if initialised wrongly. 263 # (This was seen when Sphinx api-doc imports the classes, and 264 # tried to automatically general documentation for them) 265 pass 266 except Exception: 267 raise ValueError("Problem with regular expression, re.compiled(%s)" 268 % repr(cls.compsite))
269
270 - def __add__(cls, other):
271 """Add restriction enzyme to a RestrictionBatch(). 272 273 If other is an enzyme returns a batch of the two enzymes. 274 If other is already a RestrictionBatch add enzyme to it. 275 """ 276 if isinstance(other, RestrictionType): 277 return RestrictionBatch([cls, other]) 278 elif isinstance(other, RestrictionBatch): 279 return other.add_nocheck(cls) 280 else: 281 raise TypeError
282
283 - def __div__(cls, other):
284 """Override '/' operator to use as search method. 285 286 >>> EcoRI/Seq('GAATTC') 287 [2] 288 Returns RE.search(other). 289 """ 290 return cls.search(other)
291
292 - def __rdiv__(cls, other):
293 """Override division with reversed operands to use as search method. 294 295 >>> Seq('GAATTC')/EcoRI 296 [2] 297 Returns RE.search(other). 298 """ 299 return cls.search(other)
300
301 - def __truediv__(cls, other):
302 """Override Python 3 division operator to use as search method. 303 304 Like __div__. 305 """ 306 return cls.search(other)
307
308 - def __rtruediv__(cls, other):
309 """As __truediv___, with reversed operands. 310 311 Like __rdiv__. 312 """ 313 return cls.search(other)
314
315 - def __floordiv__(cls, other):
316 """Override '//' operator to use as catalyse method. 317 318 >>> EcoRI//Seq('GAATTC') 319 (Seq('G', Alphabet()), Seq('AATTC', Alphabet())) 320 Returns RE.catalyse(other). 321 """ 322 return cls.catalyse(other)
323
324 - def __rfloordiv__(cls, other):
325 """As __floordiv__, with reversed operands. 326 327 >>> Seq('GAATTC')//EcoRI 328 (Seq('G', Alphabet()), Seq('AATTC', Alphabet())) 329 Returns RE.catalyse(other). 330 """ 331 return cls.catalyse(other)
332
333 - def __str__(cls):
334 """Return the name of the enzyme as string.""" 335 return cls.__name__
336
337 - def __repr__(cls):
338 """Implement repr method. 339 340 Used with eval or exec will instantiate the enzyme. 341 """ 342 return "%s" % cls.__name__
343
344 - def __len__(cls):
345 """Return length of recognition site of enzyme as int.""" 346 try: 347 return cls.size 348 except AttributeError: 349 # Happens if the instance was not initialised as expected. 350 # e.g. if instance created by a documentation framework 351 # like Sphinx trying to inspect the class automatically, 352 # Also seen within IPython. 353 return 0
354
355 - def __hash__(cls):
356 # Python default is to use id(...) 357 # This is consistent with the __eq__ implementation 358 return id(cls)
359
360 - def __eq__(cls, other):
361 """Override '==' operator. 362 363 True if RE and other are the same enzyme. 364 365 Specifically this checks they are the same Python object. 366 """ 367 # assert (id(cls)==id(other)) == (other is cls) == (cls is other) 368 return id(cls) == id(other)
369
370 - def __ne__(cls, other):
371 """Override '!=' operator. 372 373 Isoschizomer strict (same recognition site, same restriction) -> False 374 All the other-> True 375 376 WARNING - This is not the inverse of the __eq__ method 377 >>> SacI != SstI # true isoschizomers 378 False 379 >>> SacI == SstI 380 False 381 """ 382 if not isinstance(other, RestrictionType): 383 return True 384 elif cls.charac == other.charac: 385 return False 386 else: 387 return True
388
389 - def __rshift__(cls, other):
390 """Override '>>' operator to test for neoschizomers. 391 392 neoschizomer : same recognition site, different restriction. -> True 393 all the others : -> False 394 >>> SmaI >> XmaI 395 True 396 """ 397 if not isinstance(other, RestrictionType): 398 return False 399 elif cls.site == other.site and cls.charac != other.charac: 400 return True 401 else: 402 return False
403
404 - def __mod__(cls, other):
405 """Override '%' operator to test for compatible overhangs. 406 407 True if a and b have compatible overhang. 408 >>> XhoI % SalI 409 True 410 """ 411 if not isinstance(other, RestrictionType): 412 raise TypeError( 413 'expected RestrictionType, got %s instead' % type(other)) 414 return cls._mod1(other)
415
416 - def __ge__(cls, other):
417 """Compare length of recognition site of two enzymes. 418 419 Override '>='. a is greater or equal than b if the a site is longer 420 than b site. If their site have the same length sort by alphabetical 421 order of their names. 422 >>> EcoRI.size 423 6 424 >>> EcoRV.size 425 6 426 >>> EcoRI >= EcoRV 427 False 428 """ 429 if not isinstance(other, RestrictionType): 430 raise NotImplementedError 431 if len(cls) > len(other): 432 return True 433 elif cls.size == len(other) and cls.__name__ >= other.__name__: 434 return True 435 else: 436 return False
437
438 - def __gt__(cls, other):
439 """Compare length of recognition site of two enzymes. 440 441 Override '>'. Sorting order: 442 1. size of the recognition site. 443 2. if equal size, alphabetical order of the names. 444 445 """ 446 if not isinstance(other, RestrictionType): 447 raise NotImplementedError 448 if len(cls) > len(other): 449 return True 450 elif cls.size == len(other) and cls.__name__ > other.__name__: 451 return True 452 else: 453 return False
454
455 - def __le__(cls, other):
456 """Compare length of recognition site of two enzymes. 457 458 Override '<='. Sorting order: 459 1. size of the recognition site. 460 2. if equal size, alphabetical order of the names. 461 462 """ 463 if not isinstance(other, RestrictionType): 464 raise NotImplementedError 465 elif len(cls) < len(other): 466 return True 467 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 468 return True 469 else: 470 return False
471
472 - def __lt__(cls, other):
473 """Compare length of recognition site of two enzymes. 474 475 Override '<'. Sorting order: 476 1. size of the recognition site. 477 2. if equal size, alphabetical order of the names. 478 479 """ 480 if not isinstance(other, RestrictionType): 481 raise NotImplementedError 482 elif len(cls) < len(other): 483 return True 484 elif len(cls) == len(other) and cls.__name__ < other.__name__: 485 return True 486 else: 487 return False
488
489 490 -class AbstractCut(RestrictionType):
491 """Implement the methods that are common to all restriction enzymes. 492 493 All the methods are classmethod. 494 495 For internal use only. Not meant to be instantiated. 496 """ 497 498 @classmethod
499 - def search(cls, dna, linear=True):
500 """Return a list of cutting sites of the enzyme in the sequence. 501 502 Compensate for circular sequences and so on. 503 504 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 505 506 If linear is False, the restriction sites that span over the boundaries 507 will be included. 508 509 The positions are the first base of the 3' fragment, 510 i.e. the first base after the position the enzyme will cut. 511 """ 512 # 513 # Separating search from _search allow a (very limited) optimisation 514 # of the search when using a batch of restriction enzymes. 515 # in this case the DNA is tested once by the class which implements 516 # the batch instead of being tested by each enzyme single. 517 # see RestrictionBatch.search() for example. 518 # 519 if isinstance(dna, FormattedSeq): 520 cls.dna = dna 521 return cls._search() 522 else: 523 cls.dna = FormattedSeq(dna, linear) 524 return cls._search()
525 526 @classmethod
527 - def all_suppliers(cls):
528 """Print all the suppliers of restriction enzyme.""" 529 supply = sorted(x[0] for x in suppliers_dict.values()) 530 print(",\n".join(supply)) 531 return
532 533 @classmethod
534 - def is_equischizomer(cls, other):
535 """Test for real isoschizomer. 536 537 True if other is an isoschizomer of RE, but not an neoschizomer, 538 else False. 539 540 Equischizomer: same site, same position of restriction. 541 >>> SacI.is_equischizomer(SstI) 542 True 543 >>> SmaI.is_equischizomer(XmaI) 544 False 545 546 """ 547 return not cls != other
548 549 @classmethod
550 - def is_neoschizomer(cls, other):
551 """Test for neoschizomer. 552 553 True if other is an isoschizomer of RE, else False. 554 Neoschizomer: same site, different position of restriction. 555 """ 556 return cls >> other
557 558 @classmethod
559 - def is_isoschizomer(cls, other):
560 """Test for same recognition site. 561 562 True if other has the same recognition site, else False. 563 564 Isoschizomer: same site. 565 >>> SacI.is_isoschizomer(SstI) 566 True 567 >>> SmaI.is_isoschizomer(XmaI) 568 True 569 570 """ 571 return (not cls != other) or cls >> other
572 573 @classmethod
574 - def equischizomers(cls, batch=None):
575 """List equischizomers of the enzyme. 576 577 Return a tuple of all the isoschizomers of RE. 578 If batch is supplied it is used instead of the default AllEnzymes. 579 580 Equischizomer: same site, same position of restriction. 581 """ 582 if not batch: 583 batch = AllEnzymes 584 r = [x for x in batch if not cls != x] 585 i = r.index(cls) 586 del r[i] 587 r.sort() 588 return r
589 590 @classmethod
591 - def neoschizomers(cls, batch=None):
592 """List neoschizomers of the enzyme. 593 594 Return a tuple of all the neoschizomers of RE. 595 If batch is supplied it is used instead of the default AllEnzymes. 596 597 Neoschizomer: same site, different position of restriction. 598 """ 599 if not batch: 600 batch = AllEnzymes 601 r = sorted(x for x in batch if cls >> x) 602 return r
603 604 @classmethod
605 - def isoschizomers(cls, batch=None):
606 """List all isoschizomers of the enzyme. 607 608 Return a tuple of all the equischizomers and neoschizomers of RE. 609 If batch is supplied it is used instead of the default AllEnzymes. 610 """ 611 if not batch: 612 batch = AllEnzymes 613 r = [x for x in batch if (cls >> x) or (not cls != x)] 614 i = r.index(cls) 615 del r[i] 616 r.sort() 617 return r
618 619 @classmethod
620 - def frequency(cls):
621 """Return the theoretically cutting frequency of the enzyme. 622 623 Frequency of the site, given as 'one cut per x bases' (int). 624 """ 625 return cls.freq
626
627 628 -class NoCut(AbstractCut):
629 """Implement the methods specific to the enzymes that do not cut. 630 631 These enzymes are generally enzymes that have been only partially 632 characterised and the way they cut the DNA is unknow or enzymes for 633 which the pattern of cut is to complex to be recorded in Rebase 634 (ncuts values of 0 in emboss_e.###). 635 636 When using search() with these enzymes the values returned are at the start 637 of the restriction site. 638 639 Their catalyse() method returns a TypeError. 640 641 Unknown and NotDefined are also part of the base classes of these enzymes. 642 643 Internal use only. Not meant to be instantiated. 644 """ 645 646 @classmethod
647 - def cut_once(cls):
648 """Return if the cutting pattern has one cut. 649 650 True if the enzyme cut the sequence one time on each strand. 651 """ 652 return False
653 654 @classmethod
655 - def cut_twice(cls):
656 """Return if the cutting pattern has two cuts. 657 658 True if the enzyme cut the sequence twice on each strand. 659 """ 660 return False
661 662 @classmethod
663 - def _modify(cls, location):
664 """Return a generator that moves the cutting position by 1 (PRIVATE). 665 666 For internal use only. 667 668 location is an integer corresponding to the location of the match for 669 the enzyme pattern in the sequence. 670 _modify returns the real place where the enzyme will cut. 671 672 Example:: 673 674 EcoRI pattern : GAATTC 675 EcoRI will cut after the G. 676 so in the sequence: 677 ______ 678 GAATACACGGAATTCGA 679 | 680 10 681 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 682 EcoRI cut after the G so: 683 EcoRI._modify(10) -> 11. 684 685 If the enzyme cut twice _modify will returns two integer corresponding 686 to each cutting site. 687 """ 688 yield location
689 690 @classmethod
691 - def _rev_modify(cls, location):
692 """Return a generator that moves the cutting position by 1 (PRIVATE). 693 694 For internal use only. 695 696 As _modify for site situated on the antiparallel strand when the 697 enzyme is not palindromic. 698 """ 699 yield location
700 701 @classmethod
702 - def characteristic(cls):
703 """Return a list of the enzyme's characteristics as tuple. 704 705 the tuple contains the attributes: 706 - fst5 -> first 5' cut ((current strand) or None 707 - fst3 -> first 3' cut (complementary strand) or None 708 - scd5 -> second 5' cut (current strand) or None 709 - scd5 -> second 3' cut (complementary strand) or None 710 - site -> recognition site. 711 712 """ 713 return None, None, None, None, cls.site
714
715 716 -class OneCut(AbstractCut):
717 """Implement the methods for enzymes that cut the DNA only once. 718 719 Correspond to ncuts values of 2 in emboss_e.### 720 721 Internal use only. Not meant to be instantiated. 722 """ 723 724 @classmethod
725 - def cut_once(cls):
726 """Return if the cutting pattern has one cut. 727 728 True if the enzyme cut the sequence one time on each strand. 729 """ 730 return True
731 732 @classmethod
733 - def cut_twice(cls):
734 """Return if the cutting pattern has two cuts. 735 736 True if the enzyme cut the sequence twice on each strand. 737 """ 738 return False
739 740 @classmethod
741 - def _modify(cls, location):
742 """Return a generator that moves the cutting position by 1 (PRIVATE). 743 744 For internal use only. 745 746 location is an integer corresponding to the location of the match for 747 the enzyme pattern in the sequence. 748 _modify returns the real place where the enzyme will cut. 749 750 Example:: 751 752 EcoRI pattern : GAATTC 753 EcoRI will cut after the G. 754 so in the sequence: 755 ______ 756 GAATACACGGAATTCGA 757 | 758 10 759 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 760 EcoRI cut after the G so: 761 EcoRI._modify(10) -> 11. 762 763 if the enzyme cut twice _modify will returns two integer corresponding 764 to each cutting site. 765 """ 766 yield location + cls.fst5
767 768 @classmethod
769 - def _rev_modify(cls, location):
770 """Return a generator that moves the cutting position by 1 (PRIVATE). 771 772 For internal use only. 773 774 As _modify for site situated on the antiparallel strand when the 775 enzyme is not palindromic 776 """ 777 yield location - cls.fst3
778 779 @classmethod
780 - def characteristic(cls):
781 """Return a list of the enzyme's characteristics as tuple. 782 783 The tuple contains the attributes: 784 - fst5 -> first 5' cut ((current strand) or None 785 - fst3 -> first 3' cut (complementary strand) or None 786 - scd5 -> second 5' cut (current strand) or None 787 - scd5 -> second 3' cut (complementary strand) or None 788 - site -> recognition site. 789 790 """ 791 return cls.fst5, cls.fst3, None, None, cls.site
792
793 794 -class TwoCuts(AbstractCut):
795 """Implement the methods for enzymes that cut the DNA twice. 796 797 Correspond to ncuts values of 4 in emboss_e.### 798 799 Internal use only. Not meant to be instantiated. 800 """ 801 802 @classmethod
803 - def cut_once(cls):
804 """Return if the cutting pattern has one cut. 805 806 True if the enzyme cut the sequence one time on each strand. 807 """ 808 return False
809 810 @classmethod
811 - def cut_twice(cls):
812 """Return if the cutting pattern has two cuts. 813 814 True if the enzyme cut the sequence twice on each strand. 815 """ 816 return True
817 818 @classmethod
819 - def _modify(cls, location):
820 """Return a generator that moves the cutting position by 1 (PRIVATE). 821 822 For internal use only. 823 824 location is an integer corresponding to the location of the match for 825 the enzyme pattern in the sequence. 826 _modify returns the real place where the enzyme will cut. 827 828 example:: 829 830 EcoRI pattern : GAATTC 831 EcoRI will cut after the G. 832 so in the sequence: 833 ______ 834 GAATACACGGAATTCGA 835 | 836 10 837 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 838 EcoRI cut after the G so: 839 EcoRI._modify(10) -> 11. 840 841 if the enzyme cut twice _modify will returns two integer corresponding 842 to each cutting site. 843 """ 844 yield location + cls.fst5 845 yield location + cls.scd5
846 847 @classmethod
848 - def _rev_modify(cls, location):
849 """Return a generator that moves the cutting position by 1 (PRIVATE). 850 851 for internal use only. 852 853 as _modify for site situated on the antiparallel strand when the 854 enzyme is not palindromic 855 """ 856 yield location - cls.fst3 857 yield location - cls.scd3
858 859 @classmethod
860 - def characteristic(cls):
861 """Return a list of the enzyme's characteristics as tuple. 862 863 the tuple contains the attributes: 864 - fst5 -> first 5' cut ((current strand) or None 865 - fst3 -> first 3' cut (complementary strand) or None 866 - scd5 -> second 5' cut (current strand) or None 867 - scd5 -> second 3' cut (complementary strand) or None 868 - site -> recognition site. 869 870 """ 871 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
872
873 874 -class Meth_Dep(AbstractCut):
875 """Implement the information about methylation. 876 877 Enzymes of this class possess a site which is methylable. 878 """ 879 880 @classmethod
881 - def is_methylable(cls):
882 """Return if recognition site can be methylated. 883 884 True if the recognition site is a methylable. 885 """ 886 return True
887
888 889 -class Meth_Undep(AbstractCut):
890 """Implement information about methylation sensitibility. 891 892 Enzymes of this class are not sensible to methylation. 893 """ 894 895 @classmethod
896 - def is_methylable(cls):
897 """Return if recognition site can be methylated. 898 899 True if the recognition site is a methylable. 900 """ 901 return False
902
903 904 -class Palindromic(AbstractCut):
905 """Implement methods for enzymes with palindromic recognition sites. 906 907 palindromic means : the recognition site and its reverse complement are 908 identical. 909 Remarks : an enzyme with a site CGNNCG is palindromic even if some 910 of the sites that it will recognise are not. 911 for example here : CGAACG 912 913 Internal use only. Not meant to be instantiated. 914 """ 915 916 @classmethod
917 - def _search(cls):
918 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). 919 920 For internal use only. 921 922 Implement the search method for palindromic enzymes. 923 """ 924 siteloc = cls.dna.finditer(cls.compsite, cls.size) 925 cls.results = [r for s, g in siteloc for r in cls._modify(s)] 926 if cls.results: 927 cls._drop() 928 return cls.results
929 930 @classmethod
931 - def is_palindromic(cls):
932 """Return if the enzyme has a palindromic recoginition site.""" 933 return True
934
935 936 -class NonPalindromic(AbstractCut):
937 """Implement methods for enzymes with non-palindromic recognition sites. 938 939 Palindromic means : the recognition site and its reverse complement are 940 identical. 941 942 Internal use only. Not meant to be instantiated. 943 """ 944 945 @classmethod
946 - def _search(cls):
947 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). 948 949 For internal use only. 950 951 Implement the search method for non palindromic enzymes. 952 """ 953 iterator = cls.dna.finditer(cls.compsite, cls.size) 954 cls.results = [] 955 modif = cls._modify 956 revmodif = cls._rev_modify 957 s = str(cls) 958 cls.on_minus = [] 959 960 for start, group in iterator: 961 if group(s): 962 cls.results += [r for r in modif(start)] 963 else: 964 cls.on_minus += [r for r in revmodif(start)] 965 cls.results += cls.on_minus 966 967 if cls.results: 968 cls.results.sort() 969 cls._drop() 970 return cls.results
971 972 @classmethod
973 - def is_palindromic(cls):
974 """Return if the enzyme has a palindromic recoginition site.""" 975 return False
976
977 978 -class Unknown(AbstractCut):
979 """Implement methods for enzymes that produce unknown overhangs. 980 981 These enzymes are also NotDefined and NoCut. 982 983 Internal use only. Not meant to be instantiated. 984 """ 985 986 @classmethod
987 - def catalyse(cls, dna, linear=True):
988 """List the sequence fragments after cutting dna with enzyme. 989 990 RE.catalyze(dna, linear=True) -> tuple of DNA. 991 992 Return a tuple of dna as will be produced by using RE to restrict the 993 dna. 994 995 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 996 997 If linear is False, the sequence is considered to be circular and the 998 output will be modified accordingly. 999 """ 1000 raise NotImplementedError('%s restriction is unknown.' 1001 % cls.__name__)
1002 catalyze = catalyse 1003 1004 @classmethod
1005 - def is_blunt(cls):
1006 """Return if the enzyme produces blunt ends. 1007 1008 True if the enzyme produces blunt end. 1009 1010 Related methods: 1011 - RE.is_3overhang() 1012 - RE.is_5overhang() 1013 - RE.is_unknown() 1014 1015 """ 1016 return False
1017 1018 @classmethod
1019 - def is_5overhang(cls):
1020 """Return if the enzymes produces 5' overhanging ends. 1021 1022 True if the enzyme produces 5' overhang sticky end. 1023 1024 Related methods: 1025 - RE.is_3overhang() 1026 - RE.is_blunt() 1027 - RE.is_unknown() 1028 1029 """ 1030 return False
1031 1032 @classmethod
1033 - def is_3overhang(cls):
1034 """Return if the enzyme produces 3' overhanging ends. 1035 1036 True if the enzyme produces 3' overhang sticky end. 1037 1038 Related methods: 1039 - RE.is_5overhang() 1040 - RE.is_blunt() 1041 - RE.is_unknown() 1042 1043 """ 1044 return False
1045 1046 @classmethod
1047 - def overhang(cls):
1048 """Return the type of the enzyme's overhang as string. 1049 1050 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1051 """ 1052 return 'unknown'
1053 1054 @classmethod
1055 - def compatible_end(cls):
1056 """List all enzymes that produce compatible ends for the enzyme.""" 1057 return []
1058 1059 @classmethod
1060 - def _mod1(cls, other):
1061 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1062 1063 For internal use only. 1064 1065 Test for the compatibility of restriction ending of RE and other. 1066 """ 1067 return False
1068
1069 1070 -class Blunt(AbstractCut):
1071 """Implement methods for enzymes that produce blunt ends. 1072 1073 The enzyme cuts the + strand and the - strand of the DNA at the same 1074 place. 1075 1076 Internal use only. Not meant to be instantiated. 1077 """ 1078 1079 @classmethod
1080 - def catalyse(cls, dna, linear=True):
1081 """List the sequence fragments after cutting dna with enzyme. 1082 1083 RE.catalyze(dna, linear=True) -> tuple of DNA. 1084 1085 Return a tuple of dna as will be produced by using RE to restrict the 1086 dna. 1087 1088 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1089 1090 If linear is False, the sequence is considered to be circular and the 1091 output will be modified accordingly. 1092 """ 1093 r = cls.search(dna, linear) 1094 d = cls.dna 1095 if not r: 1096 return d[1:], 1097 fragments = [] 1098 length = len(r) - 1 1099 if d.is_linear(): 1100 # 1101 # START of the sequence to FIRST site. 1102 # 1103 fragments.append(d[1:r[0]]) 1104 if length: 1105 # 1106 # if more than one site add them. 1107 # 1108 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1109 # 1110 # LAST site to END of the sequence. 1111 # 1112 fragments.append(d[r[-1]:]) 1113 else: 1114 # 1115 # circular : bridge LAST site to FIRST site. 1116 # 1117 fragments.append(d[r[-1]:] + d[1:r[0]]) 1118 if not length: 1119 # 1120 # one site we finish here. 1121 # 1122 return tuple(fragments) 1123 # 1124 # add the others. 1125 # 1126 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1127 return tuple(fragments)
1128 catalyze = catalyse 1129 1130 @classmethod
1131 - def is_blunt(cls):
1132 """Return if the enzyme produces blunt ends. 1133 1134 True if the enzyme produces blunt end. 1135 1136 Related methods: 1137 - RE.is_3overhang() 1138 - RE.is_5overhang() 1139 - RE.is_unknown() 1140 1141 """ 1142 return True
1143 1144 @classmethod
1145 - def is_5overhang(cls):
1146 """Return if the enzymes produces 5' overhanging ends. 1147 1148 True if the enzyme produces 5' overhang sticky end. 1149 1150 Related methods: 1151 - RE.is_3overhang() 1152 - RE.is_blunt() 1153 - RE.is_unknown() 1154 1155 """ 1156 return False
1157 1158 @classmethod
1159 - def is_3overhang(cls):
1160 """Return if the enzyme produces 3' overhanging ends. 1161 1162 True if the enzyme produces 3' overhang sticky end. 1163 1164 Related methods: 1165 - RE.is_5overhang() 1166 - RE.is_blunt() 1167 - RE.is_unknown() 1168 1169 """ 1170 return False
1171 1172 @classmethod
1173 - def overhang(cls):
1174 """Return the type of the enzyme's overhang as string. 1175 1176 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1177 """ 1178 return 'blunt'
1179 1180 @classmethod
1181 - def compatible_end(cls, batch=None):
1182 """List all enzymes that produce compatible ends for the enzyme.""" 1183 if not batch: 1184 batch = AllEnzymes 1185 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) 1186 return r
1187 1188 @staticmethod
1189 - def _mod1(other):
1190 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1191 1192 For internal use only 1193 1194 Test for the compatibility of restriction ending of RE and other. 1195 """ 1196 return issubclass(other, Blunt)
1197
1198 1199 -class Ov5(AbstractCut):
1200 """Implement methods for enzymes that produce 5' overhanging ends. 1201 1202 The enzyme cuts the + strand after the - strand of the DNA. 1203 1204 Internal use only. Not meant to be instantiated. 1205 """ 1206 1207 @classmethod
1208 - def catalyse(cls, dna, linear=True):
1209 """List the sequence fragments after cutting dna with enzyme. 1210 1211 RE.catalyze(dna, linear=True) -> tuple of DNA. 1212 1213 Return a tuple of dna as will be produced by using RE to restrict the 1214 dna. 1215 1216 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1217 1218 If linear is False, the sequence is considered to be circular and the 1219 output will be modified accordingly. 1220 """ 1221 r = cls.search(dna, linear) 1222 d = cls.dna 1223 if not r: 1224 return d[1:], 1225 length = len(r) - 1 1226 fragments = [] 1227 if d.is_linear(): 1228 # 1229 # START of the sequence to FIRST site. 1230 # 1231 fragments.append(d[1:r[0]]) 1232 if length: 1233 # 1234 # if more than one site add them. 1235 # 1236 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1237 # 1238 # LAST site to END of the sequence. 1239 # 1240 fragments.append(d[r[-1]:]) 1241 else: 1242 # 1243 # circular : bridge LAST site to FIRST site. 1244 # 1245 fragments.append(d[r[-1]:] + d[1:r[0]]) 1246 if not length: 1247 # 1248 # one site we finish here. 1249 # 1250 return tuple(fragments) 1251 # 1252 # add the others. 1253 # 1254 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1255 return tuple(fragments)
1256 catalyze = catalyse 1257 1258 @classmethod
1259 - def is_blunt(cls):
1260 """Return if the enzyme produces blunt ends. 1261 1262 True if the enzyme produces blunt end. 1263 1264 Related methods: 1265 - RE.is_3overhang() 1266 - RE.is_5overhang() 1267 - RE.is_unknown() 1268 1269 """ 1270 return False
1271 1272 @classmethod
1273 - def is_5overhang(cls):
1274 """Return if the enzymes produces 5' overhanging ends. 1275 1276 True if the enzyme produces 5' overhang sticky end. 1277 1278 Related methods: 1279 - RE.is_3overhang() 1280 - RE.is_blunt() 1281 - RE.is_unknown() 1282 1283 """ 1284 return True
1285 1286 @classmethod
1287 - def is_3overhang(cls):
1288 """Return if the enzyme produces 3' overhanging ends. 1289 1290 True if the enzyme produces 3' overhang sticky end. 1291 1292 Related methods: 1293 - RE.is_5overhang() 1294 - RE.is_blunt() 1295 - RE.is_unknown() 1296 1297 """ 1298 return False
1299 1300 @classmethod
1301 - def overhang(cls):
1302 """Return the type of the enzyme's overhang as string. 1303 1304 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1305 """ 1306 return "5' overhang"
1307 1308 @classmethod
1309 - def compatible_end(cls, batch=None):
1310 """List all enzymes that produce compatible ends for the enzyme.""" 1311 if not batch: 1312 batch = AllEnzymes 1313 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and 1314 x % cls) 1315 return r
1316 1317 @classmethod
1318 - def _mod1(cls, other):
1319 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1320 1321 For internal use only. 1322 1323 Test for the compatibility of restriction ending of RE and other. 1324 """ 1325 if issubclass(other, Ov5): 1326 return cls._mod2(other) 1327 else: 1328 return False
1329
1330 1331 -class Ov3(AbstractCut):
1332 """Implement methods for enzymes that produce 3' overhanging ends. 1333 1334 The enzyme cuts the - strand after the + strand of the DNA. 1335 1336 Internal use only. Not meant to be instantiated. 1337 """ 1338 1339 @classmethod
1340 - def catalyse(cls, dna, linear=True):
1341 """List the sequence fragments after cutting dna with enzyme. 1342 1343 RE.catalyze(dna, linear=True) -> tuple of DNA. 1344 1345 Return a tuple of dna as will be produced by using RE to restrict the 1346 dna. 1347 1348 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1349 1350 If linear is False, the sequence is considered to be circular and the 1351 output will be modified accordingly. 1352 """ 1353 r = cls.search(dna, linear) 1354 d = cls.dna 1355 if not r: 1356 return d[1:], 1357 fragments = [] 1358 length = len(r) - 1 1359 if d.is_linear(): 1360 # 1361 # START of the sequence to FIRST site. 1362 # 1363 fragments.append(d[1:r[0]]) 1364 if length: 1365 # 1366 # if more than one site add them. 1367 # 1368 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1369 # 1370 # LAST site to END of the sequence. 1371 # 1372 fragments.append(d[r[-1]:]) 1373 else: 1374 # 1375 # circular : bridge LAST site to FIRST site. 1376 # 1377 fragments.append(d[r[-1]:] + d[1:r[0]]) 1378 if not length: 1379 # 1380 # one site we finish here. 1381 # 1382 return tuple(fragments) 1383 # 1384 # add the others. 1385 # 1386 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1387 return tuple(fragments)
1388 catalyze = catalyse 1389 1390 @classmethod
1391 - def is_blunt(cls):
1392 """Return if the enzyme produces blunt ends. 1393 1394 True if the enzyme produces blunt end. 1395 1396 Related methods: 1397 - RE.is_3overhang() 1398 - RE.is_5overhang() 1399 - RE.is_unknown() 1400 1401 """ 1402 return False
1403 1404 @classmethod
1405 - def is_5overhang(cls):
1406 """Return if the enzymes produces 5' overhanging ends. 1407 1408 True if the enzyme produces 5' overhang sticky end. 1409 1410 Related methods: 1411 - RE.is_3overhang() 1412 - RE.is_blunt() 1413 - RE.is_unknown() 1414 1415 """ 1416 return False
1417 1418 @classmethod
1419 - def is_3overhang(cls):
1420 """Return if the enzyme produces 3' overhanging ends. 1421 1422 True if the enzyme produces 3' overhang sticky end. 1423 1424 Related methods: 1425 - RE.is_5overhang() 1426 - RE.is_blunt() 1427 - RE.is_unknown() 1428 1429 """ 1430 return True
1431 1432 @classmethod
1433 - def overhang(cls):
1434 """Return the type of the enzyme's overhang as string. 1435 1436 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1437 """ 1438 return "3' overhang"
1439 1440 @classmethod
1441 - def compatible_end(cls, batch=None):
1442 """List all enzymes that produce compatible ends for the enzyme.""" 1443 if not batch: 1444 batch = AllEnzymes 1445 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and 1446 x % cls) 1447 return r
1448 1449 @classmethod
1450 - def _mod1(cls, other):
1451 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1452 1453 For internal use only. 1454 1455 Test for the compatibility of restriction ending of RE and other. 1456 """ 1457 # 1458 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1459 # 1460 if issubclass(other, Ov3): 1461 return cls._mod2(other) 1462 else: 1463 return False
1464
1465 1466 -class Defined(AbstractCut):
1467 """Implement methods for enzymes with defined recognition site and cut. 1468 1469 Typical example : EcoRI -> G^AATT_C 1470 The overhang will always be AATT 1471 Notes: 1472 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N 1473 Their overhang is always the same : blunt! 1474 1475 Internal use only. Not meant to be instantiated. 1476 """ 1477 1478 @classmethod
1479 - def _drop(cls):
1480 """Remove cuts that are outsite of the sequence (PRIVATE). 1481 1482 For internal use only. 1483 1484 Drop the site that are situated outside the sequence in linear 1485 sequence. Modify the index for site in circular sequences. 1486 """ 1487 # 1488 # remove or modify the results that are outside the sequence. 1489 # This is necessary since after finding the site we add the distance 1490 # from the site to the cut with the _modify and _rev_modify methods. 1491 # For linear we will remove these sites altogether. 1492 # For circular sequence, we modify the result rather than _drop it 1493 # since the site is in the sequence. 1494 # 1495 length = len(cls.dna) 1496 drop = itertools.dropwhile 1497 take = itertools.takewhile 1498 if cls.dna.is_linear(): 1499 cls.results = [x for x in drop(lambda x:x <= 1, cls.results)] 1500 cls.results = [x for x in take(lambda x:x <= length, cls.results)] 1501 else: 1502 for index, location in enumerate(cls.results): 1503 if location < 1: 1504 cls.results[index] += length 1505 else: 1506 break 1507 for index, location in enumerate(cls.results[::-1]): 1508 if location > length: 1509 cls.results[-(index + 1)] -= length 1510 else: 1511 break 1512 return
1513 1514 @classmethod
1515 - def is_defined(cls):
1516 """Return if recognition sequence and cut are defined. 1517 1518 True if the sequence recognised and cut is constant, 1519 i.e. the recognition site is not degenerated AND the enzyme cut inside 1520 the site. 1521 1522 Related methods: 1523 - RE.is_ambiguous() 1524 - RE.is_unknown() 1525 1526 """ 1527 return True
1528 1529 @classmethod
1530 - def is_ambiguous(cls):
1531 """Return if recognition sequence and cut may be ambiguous. 1532 1533 True if the sequence recognised and cut is ambiguous, 1534 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1535 the site. 1536 1537 Related methods: 1538 - RE.is_defined() 1539 - RE.is_unknown() 1540 1541 """ 1542 return False
1543 1544 @classmethod
1545 - def is_unknown(cls):
1546 """Return if recognition sequence is unknown. 1547 1548 True if the sequence is unknown, 1549 i.e. the recognition site has not been characterised yet. 1550 1551 Related methods: 1552 - RE.is_defined() 1553 - RE.is_ambiguous() 1554 1555 """ 1556 return False
1557 1558 @classmethod
1559 - def elucidate(cls):
1560 """Return a string representing the recognition site and cuttings. 1561 1562 Return a representation of the site with the cut on the (+) strand 1563 represented as '^' and the cut on the (-) strand as '_'. 1564 ie: 1565 1566 >>> EcoRI.elucidate() # 5' overhang 1567 'G^AATT_C' 1568 >>> KpnI.elucidate() # 3' overhang 1569 'G_GTAC^C' 1570 >>> EcoRV.elucidate() # blunt 1571 'GAT^_ATC' 1572 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1573 '? GTATAC ?' 1574 >>> 1575 1576 """ 1577 f5 = cls.fst5 1578 f3 = cls.fst3 1579 site = cls.site 1580 if cls.cut_twice(): 1581 re = 'cut twice, not yet implemented sorry.' 1582 elif cls.is_5overhang(): 1583 if f5 == f3 == 0: 1584 re = 'N^' + cls.site + '_N' 1585 elif f3 == 0: 1586 re = site[:f5] + '^' + site[f5:] + '_N' 1587 else: 1588 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1589 elif cls.is_blunt(): 1590 re = site[:f5] + '^_' + site[f5:] 1591 else: 1592 if f5 == f3 == 0: 1593 re = 'N_' + site + '^N' 1594 else: 1595 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1596 return re
1597 1598 @classmethod
1599 - def _mod2(cls, other):
1600 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1601 1602 For internal use only. 1603 1604 Test for the compatibility of restriction ending of RE and other. 1605 """ 1606 # 1607 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1608 # 1609 if other.ovhgseq == cls.ovhgseq: 1610 return True 1611 elif issubclass(other, Ambiguous): 1612 return other._mod2(cls) 1613 else: 1614 return False
1615
1616 1617 -class Ambiguous(AbstractCut):
1618 """Implement methods for enzymes that produce variable overhangs. 1619 1620 Typical example : BstXI -> CCAN_NNNN^NTGG 1621 The overhang can be any sequence of 4 bases. 1622 1623 Notes: 1624 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N 1625 Their overhang is always the same : blunt! 1626 1627 Internal use only. Not meant to be instantiated. 1628 1629 """ 1630 1631 @classmethod
1632 - def _drop(cls):
1633 """Remove cuts that are outsite of the sequence (PRIVATE). 1634 1635 For internal use only. 1636 1637 Drop the site that are situated outside the sequence in linear 1638 sequence. Modify the index for site in circular sequences. 1639 """ 1640 length = len(cls.dna) 1641 drop = itertools.dropwhile 1642 take = itertools.takewhile 1643 if cls.dna.is_linear(): 1644 cls.results = [x for x in drop(lambda x: x <= 1, cls.results)] 1645 cls.results = [x for x in take(lambda x: x <= length, cls.results)] 1646 else: 1647 for index, location in enumerate(cls.results): 1648 if location < 1: 1649 cls.results[index] += length 1650 else: 1651 break 1652 for index, location in enumerate(cls.results[::-1]): 1653 if location > length: 1654 cls.results[-(index + 1)] -= length 1655 else: 1656 break 1657 return
1658 1659 @classmethod
1660 - def is_defined(cls):
1661 """Return if recognition sequence and cut are defined. 1662 1663 True if the sequence recognised and cut is constant, 1664 i.e. the recognition site is not degenerated AND the enzyme cut inside 1665 the site. 1666 1667 Related methods: 1668 - RE.is_ambiguous() 1669 - RE.is_unknown() 1670 1671 """ 1672 return False
1673 1674 @classmethod
1675 - def is_ambiguous(cls):
1676 """Return if recognition sequence and cut may be ambiguous. 1677 1678 True if the sequence recognised and cut is ambiguous, 1679 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1680 the site. 1681 1682 Related methods: 1683 - RE.is_defined() 1684 - RE.is_unknown() 1685 1686 """ 1687 return True
1688 1689 @classmethod
1690 - def is_unknown(cls):
1691 """Return if recognition sequence is unknown. 1692 1693 True if the sequence is unknown, 1694 i.e. the recognition site has not been characterised yet. 1695 1696 Related methods: 1697 - RE.is_defined() 1698 - RE.is_ambiguous() 1699 1700 """ 1701 return False
1702 1703 @classmethod
1704 - def _mod2(cls, other):
1705 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1706 1707 For internal use only. 1708 1709 Test for the compatibility of restriction ending of RE and other. 1710 """ 1711 # 1712 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1713 # 1714 if len(cls.ovhgseq) != len(other.ovhgseq): 1715 return False 1716 else: 1717 se = cls.ovhgseq 1718 for base in se: 1719 if base in 'ATCG': 1720 pass 1721 if base in 'N': 1722 se = '.'.join(se.split('N')) 1723 if base in 'RYWMSKHDBV': 1724 expand = '[' + matching[base] + ']' 1725 se = expand.join(se.split(base)) 1726 if re.match(se, other.ovhgseq): 1727 return True 1728 else: 1729 return False
1730 1731 @classmethod
1732 - def elucidate(cls):
1733 """Return a string representing the recognition site and cuttings. 1734 1735 Return a representation of the site with the cut on the (+) strand 1736 represented as '^' and the cut on the (-) strand as '_'. 1737 ie: 1738 1739 >>> EcoRI.elucidate() # 5' overhang 1740 'G^AATT_C' 1741 >>> KpnI.elucidate() # 3' overhang 1742 'G_GTAC^C' 1743 >>> EcoRV.elucidate() # blunt 1744 'GAT^_ATC' 1745 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1746 '? GTATAC ?' 1747 >>> 1748 1749 """ 1750 f5 = cls.fst5 1751 f3 = cls.fst3 1752 length = len(cls) 1753 site = cls.site 1754 if cls.cut_twice(): 1755 re = 'cut twice, not yet implemented sorry.' 1756 elif cls.is_5overhang(): 1757 if f3 == f5 == 0: 1758 re = 'N^' + site + '_N' 1759 elif 0 <= f5 <= length and 0 <= f3 + length <= length: 1760 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1761 elif 0 <= f5 <= length: 1762 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N' 1763 elif 0 <= f3 + length <= length: 1764 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1765 elif f3 + length < 0: 1766 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site 1767 elif f5 > length: 1768 re = site + (f5 - length) * 'N' + '^' + (length + 1769 f3 - f5) * 'N' + '_N' 1770 else: 1771 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N' 1772 elif cls.is_blunt(): 1773 if f5 < 0: 1774 re = 'N^_' + abs(f5) * 'N' + site 1775 elif f5 > length: 1776 re = site + (f5 - length) * 'N' + '^_N' 1777 else: 1778 raise ValueError('%s.easyrepr() : error f5=%i' 1779 % (cls.name, f5)) 1780 else: 1781 if f3 == 0: 1782 if f5 == 0: 1783 re = 'N_' + site + '^N' 1784 else: 1785 re = site + '_' + (f5 - length) * 'N' + '^N' 1786 elif 0 < f3 + length <= length and 0 <= f5 <= length: 1787 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1788 elif 0 < f3 + length <= length: 1789 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N' 1790 elif 0 <= f5 <= length: 1791 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:] 1792 elif f3 > 0: 1793 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N' 1794 elif f5 < 0: 1795 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \ 1796 + site 1797 else: 1798 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \ 1799 'N' + '^N' 1800 return re
1801
1802 1803 -class NotDefined(AbstractCut):
1804 """Implement methods for enzymes with non-characterized overhangs. 1805 1806 Correspond to NoCut and Unknown. 1807 1808 Internal use only. Not meant to be instantiated. 1809 """ 1810 1811 @classmethod
1812 - def _drop(cls):
1813 """Remove cuts that are outsite of the sequence (PRIVATE). 1814 1815 For internal use only. 1816 1817 Drop the site that are situated outside the sequence in linear 1818 sequence. Modify the index for site in circular sequences. 1819 """ 1820 if cls.dna.is_linear(): 1821 return 1822 else: 1823 length = len(cls.dna) 1824 for index, location in enumerate(cls.results): 1825 if location < 1: 1826 cls.results[index] += length 1827 else: 1828 break 1829 for index, location in enumerate(cls.results[:-1]): 1830 if location > length: 1831 cls.results[-(index + 1)] -= length 1832 else: 1833 break 1834 return
1835 1836 @classmethod
1837 - def is_defined(cls):
1838 """Return if recognition sequence and cut are defined. 1839 1840 True if the sequence recognised and cut is constant, 1841 i.e. the recognition site is not degenerated AND the enzyme cut inside 1842 the site. 1843 1844 Related methods: 1845 - RE.is_ambiguous() 1846 - RE.is_unknown() 1847 1848 """ 1849 return False
1850 1851 @classmethod
1852 - def is_ambiguous(cls):
1853 """Return if recognition sequence and cut may be ambiguous. 1854 1855 True if the sequence recognised and cut is ambiguous, 1856 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1857 the site. 1858 1859 Related methods: 1860 - RE.is_defined() 1861 - RE.is_unknown() 1862 1863 """ 1864 return False
1865 1866 @classmethod
1867 - def is_unknown(cls):
1868 """Return if recognition sequence is unknown. 1869 1870 True if the sequence is unknown, 1871 i.e. the recognition site has not been characterised yet. 1872 1873 Related methods: 1874 - RE.is_defined() 1875 - RE.is_ambiguous() 1876 1877 """ 1878 return True
1879 1880 @classmethod
1881 - def _mod2(cls, other):
1882 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1883 1884 For internal use only. 1885 1886 Test for the compatibility of restriction ending of RE and other. 1887 """ 1888 # 1889 # Normally we should not arrive here. But well better safe than 1890 # sorry. 1891 # the overhang is not defined we are compatible with nobody. 1892 # could raise an Error may be rather than return quietly. 1893 # 1894 # return False 1895 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" 1896 % (str(cls), str(other), str(cls)))
1897 1898 @classmethod
1899 - def elucidate(cls):
1900 """Return a string representing the recognition site and cuttings. 1901 1902 Return a representation of the site with the cut on the (+) strand 1903 represented as '^' and the cut on the (-) strand as '_'. 1904 ie: 1905 1906 >>> EcoRI.elucidate() # 5' overhang 1907 'G^AATT_C' 1908 >>> KpnI.elucidate() # 3' overhang 1909 'G_GTAC^C' 1910 >>> EcoRV.elucidate() # blunt 1911 'GAT^_ATC' 1912 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1913 '? GTATAC ?' 1914 >>> 1915 1916 """ 1917 return '? %s ?' % cls.site
1918
1919 1920 -class Commercially_available(AbstractCut):
1921 """Implement methods for enzymes which are commercially available. 1922 1923 Internal use only. Not meant to be instantiated. 1924 """ 1925 1926 # 1927 # Recent addition to Rebase make this naming convention uncertain. 1928 # May be better to says enzymes which have a supplier. 1929 # 1930 1931 @classmethod
1932 - def suppliers(cls):
1933 """Print a list of suppliers of the enzyme.""" 1934 for s in cls.suppl: 1935 print(suppliers_dict[s][0] + ',') 1936 return
1937 1938 @classmethod
1939 - def supplier_list(cls):
1940 """Return a list of suppliers of the enzyme.""" 1941 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1942 1943 @classmethod
1944 - def buffers(cls, supplier):
1945 """Return the recommended buffer of the supplier for this enzyme. 1946 1947 Not implemented yet. 1948 """ 1949 return
1950 1951 @classmethod
1952 - def is_comm(cls):
1953 """Return if enzyme is commercially available. 1954 1955 True if RE has suppliers. 1956 """ 1957 return True
1958
1959 1960 -class Not_available(AbstractCut):
1961 """Implement methods for enzymes which are not commercially available. 1962 1963 Internal use only. Not meant to be instantiated. 1964 """ 1965 1966 @staticmethod
1967 - def suppliers():
1968 """Print a list of suppliers of the enzyme.""" 1969 return None
1970 1971 @classmethod
1972 - def supplier_list(cls):
1973 """Return a list of suppliers of the enzyme.""" 1974 return []
1975 1976 @classmethod
1977 - def buffers(cls, supplier):
1978 """Return the recommended buffer of the supplier for this enzyme. 1979 1980 Not implemented yet. 1981 """ 1982 raise TypeError("Enzyme not commercially available.")
1983 1984 @classmethod
1985 - def is_comm(cls):
1986 """Return if enzyme is commercially available. 1987 1988 True if RE has suppliers. 1989 """ 1990 return False
1991
1992 1993 ############################################################################### 1994 # # 1995 # Restriction Batch # 1996 # # 1997 ############################################################################### 1998 1999 2000 -class RestrictionBatch(set):
2001 """Class for operations on more than one enzyme.""" 2002
2003 - def __init__(self, first=(), suppliers=()):
2004 """Initialize empty RB or pre-fill with enzymes (from supplier).""" 2005 first = [self.format(x) for x in first] 2006 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 2007 set.__init__(self, first) 2008 self.mapping = dict.fromkeys(self) 2009 self.already_mapped = None 2010 self.suppliers = [x for x in suppliers if x in suppliers_dict]
2011
2012 - def __str__(self):
2013 if len(self) < 5: 2014 return '+'.join(self.elements()) 2015 else: 2016 return '...'.join(('+'.join(self.elements()[:2]), 2017 '+'.join(self.elements()[-2:])))
2018
2019 - def __repr__(self):
2020 return 'RestrictionBatch(%s)' % self.elements()
2021
2022 - def __contains__(self, other):
2023 try: 2024 other = self.format(other) 2025 except ValueError: # other is not a restriction enzyme 2026 return False 2027 return set.__contains__(self, other)
2028
2029 - def __div__(self, other):
2030 """Override '/' operator to use as search method.""" 2031 return self.search(other)
2032
2033 - def __rdiv__(self, other):
2034 """Override division with reversed operands to use as search method.""" 2035 return self.search(other)
2036
2037 - def __truediv__(self, other):
2038 """Override Python 3 division operator to use as search method. 2039 2040 Like __div__. 2041 """ 2042 return self.search(other)
2043
2044 - def __rtruediv__(self, other):
2045 """As __truediv___, with reversed operands. 2046 2047 Like __rdiv__. 2048 """ 2049 return self.search(other)
2050
2051 - def get(self, enzyme, add=False):
2052 """Check if enzyme is in batch and return it. 2053 2054 If add is True and enzyme is not in batch add enzyme to batch. 2055 If add is False (which is the default) only return enzyme. 2056 If enzyme is not a RestrictionType or can not be evaluated to 2057 a RestrictionType, raise a ValueError. 2058 """ 2059 e = self.format(enzyme) 2060 if e in self: 2061 return e 2062 elif add: 2063 self.add(e) 2064 return e 2065 else: 2066 raise ValueError('enzyme %s is not in RestrictionBatch' 2067 % e.__name__)
2068
2069 - def lambdasplit(self, func):
2070 """Filter enzymes in batch with supplied function. 2071 2072 The new batch will contain only the enzymes for which 2073 func return True. 2074 """ 2075 d = [x for x in filter(func, self)] 2076 new = RestrictionBatch() 2077 new._data = dict(zip(d, [True] * len(d))) 2078 return new
2079
2080 - def add_supplier(self, letter):
2081 """Add all enzymes from a given supplier to batch. 2082 2083 letter represents the suppliers as defined in the dictionary 2084 RestrictionDictionary.suppliers 2085 Returns None. 2086 Raise a KeyError if letter is not a supplier code. 2087 """ 2088 supplier = suppliers_dict[letter] 2089 self.suppliers.append(letter) 2090 for x in supplier[1]: 2091 self.add_nocheck(eval(x)) 2092 return
2093
2094 - def current_suppliers(self):
2095 """List the current suppliers for the restriction batch. 2096 2097 Return a sorted list of the suppliers which have been used to 2098 create the batch. 2099 """ 2100 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) 2101 return suppl_list
2102
2103 - def __iadd__(self, other):
2104 """Override '+=' for use with sets. 2105 2106 b += other -> add other to b, check the type of other. 2107 """ 2108 self.add(other) 2109 return self
2110
2111 - def __add__(self, other):
2112 """Overide '+' for use with sets. 2113 2114 b + other -> new RestrictionBatch. 2115 """ 2116 new = self.__class__(self) 2117 new.add(other) 2118 return new
2119
2120 - def remove(self, other):
2121 """Remove enzyme from restriction batch. 2122 2123 Safe set.remove method. Verify that other is a RestrictionType or can 2124 be evaluated to a RestrictionType. 2125 Raise a ValueError if other can not be evaluated to a RestrictionType. 2126 Raise a KeyError if other is not in B. 2127 """ 2128 return set.remove(self, self.format(other))
2129
2130 - def add(self, other):
2131 """Add a restriction enzyme to the restriction batch. 2132 2133 Safe set.add method. Verify that other is a RestrictionType or can be 2134 evaluated to a RestrictionType. 2135 Raise a ValueError if other can not be evaluated to a RestrictionType. 2136 """ 2137 return set.add(self, self.format(other))
2138
2139 - def add_nocheck(self, other):
2140 """Add restriction enzyme to batch without checking its type.""" 2141 return set.add(self, other)
2142
2143 - def format(self, y):
2144 """Evaluate enzyme (name) and return it (as RestrictionType). 2145 2146 If y is a RestrictionType return y. 2147 If y can be evaluated to a RestrictionType return eval(y). 2148 Raise a ValueError in all other case. 2149 """ 2150 try: 2151 if isinstance(y, RestrictionType): 2152 return y 2153 elif isinstance(eval(str(y)), RestrictionType): 2154 return eval(y) 2155 else: 2156 pass 2157 except (NameError, SyntaxError): 2158 pass 2159 raise ValueError('%s is not a RestrictionType' % y.__class__)
2160
2161 - def is_restriction(self, y):
2162 """Return if enzyme (name) is a known enzyme. 2163 2164 True if y or eval(y) is a RestrictionType. 2165 """ 2166 return (isinstance(y, RestrictionType) or 2167 isinstance(eval(str(y)), RestrictionType))
2168
2169 - def split(self, *classes, **bool):
2170 """Extract enzymes of a certain class and put in new RestrictionBatch. 2171 2172 B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 2173 2174 It works but it is slow, so it has really an interest when splitting 2175 over multiple conditions. 2176 """ 2177 def splittest(element): 2178 for klass in classes: 2179 b = bool.get(klass.__name__, True) 2180 if issubclass(element, klass): 2181 if b: 2182 continue 2183 else: 2184 return False 2185 elif b: 2186 return False 2187 else: 2188 continue 2189 return True
2190 d = [k for k in filter(splittest, self)] 2191 new = RestrictionBatch() 2192 new._data = dict(zip(d, [True] * len(d))) 2193 return new
2194
2195 - def elements(self):
2196 """List the enzymes of the RestrictionBatch as list of strings. 2197 2198 Give all the names of the enzymes in B sorted alphabetically. 2199 """ 2200 return sorted(str(e) for e in self)
2201
2202 - def as_string(self):
2203 """List the names of the enzymes of the RestrictionBatch. 2204 2205 Return a list of the name of the elements of the batch. 2206 """ 2207 return [str(e) for e in self]
2208 2209 @classmethod
2210 - def suppl_codes(cls):
2211 """Return a dicionary with supplier codes. 2212 2213 Letter code for the suppliers. 2214 """ 2215 supply = dict((k, v[0]) for k, v in suppliers_dict.items()) 2216 return supply
2217 2218 @classmethod
2219 - def show_codes(cls):
2220 """Print a list of supplier codes.""" 2221 supply = [' = '.join(i) for i in cls.suppl_codes().items()] 2222 print('\n'.join(supply)) 2223 return
2224
2225 - def search(self, dna, linear=True):
2226 """Return a dic of cutting sites in the seq for the batch enzymes.""" 2227 # 2228 # here we replace the search method of the individual enzymes 2229 # with one unique testing method. 2230 # 2231 if not hasattr(self, "already_mapped"): 2232 # TODO - Why does this happen! 2233 # Try the "doctest" at the start of PrintFormat.py 2234 self.already_mapped = None 2235 if isinstance(dna, DNA): 2236 # For the searching, we just care about the sequence as a string, 2237 # if that is the same we can use the cached search results. 2238 # At the time of writing, Seq == method isn't implemented, 2239 # and therefore does object identity which is stricter. 2240 if (str(dna), linear) == self.already_mapped: 2241 return self.mapping 2242 else: 2243 self.already_mapped = str(dna), linear 2244 fseq = FormattedSeq(dna, linear) 2245 self.mapping = dict((x, x.search(fseq)) for x in self) 2246 return self.mapping 2247 elif isinstance(dna, FormattedSeq): 2248 if (str(dna), dna.linear) == self.already_mapped: 2249 return self.mapping 2250 else: 2251 self.already_mapped = str(dna), dna.linear 2252 self.mapping = dict((x, x.search(dna)) for x in self) 2253 return self.mapping 2254 raise TypeError("Expected Seq or MutableSeq instance, got %s instead" 2255 % type(dna))
2256
2257 ############################################################################### 2258 # # 2259 # Restriction Analysis # 2260 # # 2261 ############################################################################### 2262 2263 2264 -class Analysis(RestrictionBatch, PrintFormat):
2265 """Provide methods for enhanced analysis and pretty printing.""" 2266
2267 - def __init__(self, restrictionbatch=RestrictionBatch(), sequence=DNA(''), 2268 linear=True):
2269 """Initialize an Analysis with RestrictionBatch and sequence. 2270 2271 Analysis([restrictionbatch [, sequence] linear=True]) 2272 -> New Analysis class. 2273 2274 For most of the methods of this class if a dictionary is given it will 2275 be used as the base to calculate the results. 2276 If no dictionary is given a new analysis using the RestrictionBatch 2277 which has been given when the Analysis class has been instantiated, 2278 will be carried out and used. 2279 """ 2280 RestrictionBatch.__init__(self, restrictionbatch) 2281 self.rb = restrictionbatch 2282 self.sequence = sequence 2283 self.linear = linear 2284 if self.sequence: 2285 self.search(self.sequence, self.linear)
2286
2287 - def __repr__(self):
2288 return 'Analysis(%s,%s,%s)' %\ 2289 (repr(self.rb), repr(self.sequence), self.linear)
2290
2291 - def _sub_set(self, wanted):
2292 """Filter result for keys which are in wanted (PRIVATE). 2293 2294 A._sub_set(other_set) -> dict. 2295 2296 Internal use only. 2297 2298 Screen the results through wanted set. 2299 Keep only the results for which the enzymes is in wanted set. 2300 """ 2301 # It seems that this method is not used in the whole class! 2302 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2303
2304 - def _boundaries(self, start, end):
2305 """Set boundaries to correct values (PRIVATE). 2306 2307 Format the boundaries for use with the methods that limit the 2308 search to only part of the sequence given to analyse. 2309 """ 2310 if not isinstance(start, int): 2311 raise TypeError('expected int, got %s instead' % type(start)) 2312 if not isinstance(end, int): 2313 raise TypeError('expected int, got %s instead' % type(end)) 2314 if start < 1: # Looks like this tries to do python list like indexing 2315 start += len(self.sequence) 2316 if end < 1: 2317 end += len(self.sequence) 2318 if start < end: 2319 pass 2320 else: 2321 start, end = end, start 2322 if start < end: 2323 return start, end, self._test_normal
2324
2325 - def _test_normal(self, start, end, site):
2326 """Test if site is between start and end (PRIVATE). 2327 2328 Internal use only 2329 """ 2330 return start <= site < end
2331
2332 - def _test_reverse(self, start, end, site):
2333 """Test if site is between end and start, for circular sequences (PRIVATE). 2334 2335 Internal use only. 2336 """ 2337 return start <= site <= len(self.sequence) or 1 <= site < end
2338
2339 - def format_output(self, dct=None, title='', s1=''):
2340 """Collect data and pass to PrintFormat. 2341 2342 A.format_output([dct[, title[, s1]]]) -> dct. 2343 2344 If dct is not given the full dictionary is used. 2345 """ 2346 if not dct: 2347 dct = self.mapping 2348 return PrintFormat.format_output(self, dct, title, s1)
2349
2350 - def print_that(self, dct=None, title='', s1=''):
2351 """Print the output of the analysis. 2352 2353 A.print_that([dct[, title[, s1[,print_]]]]) -> print the results 2354 from dct. 2355 2356 If dct is not given the full dictionary is used. 2357 s1: Title for non-cutting enzymes 2358 This method prints the output of A.format_output() and it is here 2359 for backwards compatibility. 2360 """ 2361 print(self.format_output(dct, title, s1))
2362
2363 - def change(self, **what):
2364 """Change parameters of print output. 2365 2366 `A.change(**attribute_name)` -> Change attribute of Analysis. 2367 2368 It is possible to change the width of the shell by setting 2369 self.ConsoleWidth to what you want. 2370 self.NameWidth refer to the maximal length of the enzyme name. 2371 2372 Changing one of these parameters here might not give the results 2373 you expect. In which case, you can settle back to a 80 columns shell 2374 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2375 you get it right. 2376 """ 2377 for k, v in what.items(): 2378 if k in ('NameWidth', 'ConsoleWidth'): 2379 setattr(self, k, v) 2380 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2381 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2382 elif k == 'sequence': 2383 setattr(self, 'sequence', v) 2384 self.search(self.sequence, self.linear) 2385 elif k == 'rb': 2386 self = Analysis.__init__(self, v, self.sequence, self.linear) 2387 elif k == 'linear': 2388 setattr(self, 'linear', v) 2389 self.search(self.sequence, v) 2390 elif k in ('Indent', 'Maxsize'): 2391 setattr(self, k, v) 2392 elif k in ('Cmodulo', 'PrefWidth'): 2393 raise AttributeError( 2394 'To change %s, change NameWidth and/or ConsoleWidth' % k) 2395 else: 2396 raise AttributeError('Analysis has no attribute %s' % k) 2397 return
2398
2399 - def full(self, linear=True):
2400 """Perform analysis with all enzymes of batch and return all results. 2401 2402 A.full() -> dict. 2403 2404 Full Restriction Map of the sequence. 2405 """ 2406 return self.mapping
2407
2408 - def blunt(self, dct=None):
2409 """Return only cuts that have blunt ends.""" 2410 if not dct: 2411 dct = self.mapping 2412 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2413
2414 - def overhang5(self, dct=None):
2415 """Return only cuts that have 5' overhangs.""" 2416 if not dct: 2417 dct = self.mapping 2418 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2419
2420 - def overhang3(self, dct=None):
2421 """Return only cuts that have 3' overhangs.""" 2422 if not dct: 2423 dct = self.mapping 2424 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2425
2426 - def defined(self, dct=None):
2427 """Return only results from enzymes that produce defined overhangs.""" 2428 if not dct: 2429 dct = self.mapping 2430 return dict((k, v) for k, v in dct.items() if k.is_defined())
2431
2432 - def with_sites(self, dct=None):
2433 """Return only results from enzyme with at least one cut.""" 2434 if not dct: 2435 dct = self.mapping 2436 return dict((k, v) for k, v in dct.items() if v)
2437
2438 - def without_site(self, dct=None):
2439 """Return only results from enzymes that don't cut the sequence.""" 2440 if not dct: 2441 dct = self.mapping 2442 return dict((k, v) for k, v in dct.items() if not v)
2443
2444 - def with_N_sites(self, N, dct=None):
2445 """Return only results from enzymes that cut the sequence N times.""" 2446 if not dct: 2447 dct = self.mapping 2448 return dict((k, v) for k, v in dct.items()if len(v) == N)
2449
2450 - def with_number_list(self, list, dct=None):
2451 """Return only results from enzymes that cut (x,y,z,...) times.""" 2452 if not dct: 2453 dct = self.mapping 2454 return dict((k, v) for k, v in dct.items() if len(v) in list)
2455
2456 - def with_name(self, names, dct=None):
2457 """Return only results from enzymes which names are listed.""" 2458 for i, enzyme in enumerate(names): 2459 if enzyme not in AllEnzymes: 2460 warnings.warn("no data for the enzyme: %s" % enzyme, 2461 BiopythonWarning) 2462 del names[i] 2463 if not dct: 2464 return RestrictionBatch(names).search(self.sequence, self.linear) 2465 return dict((n, dct[n]) for n in names if n in dct)
2466
2467 - def with_site_size(self, site_size, dct=None):
2468 """Return only results form enzymes with a given site size.""" 2469 sites = [name for name in self if name.size == site_size] 2470 if not dct: 2471 return RestrictionBatch(sites).search(self.sequence) 2472 return dict((k, v) for k, v in dct.items() if k in site_size)
2473
2474 - def only_between(self, start, end, dct=None):
2475 """Return only results from enzymes that only cut within start, end.""" 2476 start, end, test = self._boundaries(start, end) 2477 if not dct: 2478 dct = self.mapping 2479 d = dict(dct) 2480 for key, sites in dct.items(): 2481 if not sites: 2482 del d[key] 2483 continue 2484 for site in sites: 2485 if test(start, end, site): 2486 continue 2487 else: 2488 del d[key] 2489 break 2490 return d
2491
2492 - def between(self, start, end, dct=None):
2493 """Return only results from enzymes that cut at least within borders. 2494 2495 Enzymes that cut the sequence at least in between start and end. 2496 They may cut outside as well. 2497 """ 2498 start, end, test = self._boundaries(start, end) 2499 d = {} 2500 if not dct: 2501 dct = self.mapping 2502 for key, sites in dct.items(): 2503 for site in sites: 2504 if test(start, end, site): 2505 d[key] = sites 2506 break 2507 continue 2508 return d
2509
2510 - def show_only_between(self, start, end, dct=None):
2511 """Return only results from within start, end. 2512 2513 Enzymes must cut inside start/end and may also cut outside. However, 2514 only the cutting positions within start/end will be returned. 2515 """ 2516 d = [] 2517 if start <= end: 2518 d = [(k, [vv for vv in v if start <= vv <= end]) 2519 for k, v in self.between(start, end, dct).items()] 2520 else: 2521 d = [(k, [vv for vv in v if start <= vv or vv <= end]) 2522 for k, v in self.between(start, end, dct).items()] 2523 return dict(d)
2524
2525 - def only_outside(self, start, end, dct=None):
2526 """Return only results from enzymes that only cut outside start, end. 2527 2528 Enzymes that cut the sequence outside of the region 2529 in between start and end but do not cut inside. 2530 """ 2531 start, end, test = self._boundaries(start, end) 2532 if not dct: 2533 dct = self.mapping 2534 d = dict(dct) 2535 for key, sites in dct.items(): 2536 if not sites: 2537 del d[key] 2538 continue 2539 for site in sites: 2540 if test(start, end, site): 2541 del d[key] 2542 break 2543 else: 2544 continue 2545 return d
2546
2547 - def outside(self, start, end, dct=None):
2548 """Return only results from enzymes that at least cut outside borders. 2549 2550 Enzymes that cut outside the region in between start and end. 2551 They may cut inside as well. 2552 """ 2553 start, end, test = self._boundaries(start, end) 2554 if not dct: 2555 dct = self.mapping 2556 d = {} 2557 for key, sites in dct.items(): 2558 for site in sites: 2559 if test(start, end, site): 2560 continue 2561 else: 2562 d[key] = sites 2563 break 2564 return d
2565
2566 - def do_not_cut(self, start, end, dct=None):
2567 """Return only results from enzymes that don't cut between borders.""" 2568 if not dct: 2569 dct = self.mapping 2570 d = self.without_site() 2571 d.update(self.only_outside(start, end, dct)) 2572 return d
2573 2574 2575 # 2576 # The restriction enzyme classes are created dynamically when the module is 2577 # imported. Here is the magic which allow the creation of the 2578 # restriction-enzyme classes. 2579 # 2580 # The reason for the two dictionaries in Restriction_Dictionary 2581 # one for the types (which will be called pseudo-type as they really 2582 # correspond to the values that instances of RestrictionType can take) 2583 # and one for the enzymes is efficiency as the bases are evaluated 2584 # once per pseudo-type. 2585 # 2586 # However Restriction is still a very inefficient module at import. But 2587 # remember that around 660 classes (which is more or less the size of Rebase) 2588 # have to be created dynamically. However, this processing take place only 2589 # once. 2590 # This inefficiency is however largely compensated by the use of metaclass 2591 # which provide a very efficient layout for the class themselves mostly 2592 # alleviating the need of if/else loops in the class methods. 2593 # 2594 # It is essential to run Restriction with doc string optimisation (-OO 2595 # switch) as the doc string of 660 classes take a lot of processing. 2596 # 2597 CommOnly = RestrictionBatch() # commercial enzymes 2598 NonComm = RestrictionBatch() # not available commercially 2599 for TYPE, (bases, enzymes) in typedict.items(): 2600 # 2601 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2602 # The names are not important and are only present to differentiate 2603 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2604 # These names will not be used after and the pseudo-types are not 2605 # kept in the locals() dictionary. It is therefore impossible to 2606 # import them. 2607 # Now, if you have look at the dictionary, you will see that not all the 2608 # types are present as those without corresponding enzymes have been 2609 # removed by Dictionary_Builder(). 2610 # 2611 # The values are tuples which contain 2612 # as first element a tuple of bases (as string) and 2613 # as second element the names of the enzymes. 2614 # 2615 # First eval the bases. 2616 # 2617 bases = tuple(eval(x) for x in bases) 2618 # 2619 # now create the particular value of RestrictionType for the classes 2620 # in enzymes. 2621 # 2622 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2623 for k in enzymes: 2624 # 2625 # Now, we go through all the enzymes and assign them their type. 2626 # enzymedict[k] contains the values of the attributes for this 2627 # particular class (self.site, self.ovhg,....). 2628 # 2629 newenz = T(k, bases, enzymedict[k]) 2630 # 2631 # we add the enzymes to the corresponding batch. 2632 # 2633 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2634 # 2635 if newenz.is_comm(): 2636 CommOnly.add_nocheck(newenz) 2637 else: 2638 NonComm.add_nocheck(newenz) 2639 # 2640 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2641 # 2642 AllEnzymes = RestrictionBatch(CommOnly) 2643 AllEnzymes.update(NonComm) 2644 # 2645 # Now, place the enzymes in locals so they can be imported. 2646 # 2647 names = [str(x) for x in AllEnzymes] 2648 try: 2649 del x # noqa 2650 except NameError: 2651 # Scoping changed in Python 3, the variable isn't leaked 2652 pass 2653 locals().update(dict(zip(names, AllEnzymes))) 2654 __all__ = ('FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 2655 'CommOnly', 'NonComm') + tuple(names) 2656 del k, enzymes, TYPE, bases, names 2657